United States Car Accident Project
1 Indroduction
1.1 Motivation for the Project - James
1.2 Research Questions -James
1.3 Overview of Modeling Techniques - Me
Binary Response Model
Linear Regression
Lasso Regression
Ridge Regression
Ordinal Logit Regression
Decision Tree
Random Forest
Artificial Neural Network
2 Raw Data
3 Preparing Data For Machine Learning
accident_12var <- accident_raw %>%
select(Severity,State, `Temperature(F)`, `Humidity(%)`,
`Visibility(mi)`, `Wind_Speed(mph)`, Weather_Condition,
`Precipitation(in)`, Crossing, Junction, Traffic_Signal,
Sunrise_Sunset)
colnames(accident_12var) <- gsub("\\)|\\%|\\(", ".", colnames(accident_12var))library(caret)
library(recipes)
library(dplyr)
# Split the data into training and testing sets
set.seed(2)
train_indices <- createDataPartition(accident_12var$Severity, p = 0.8, list = FALSE)
train_set <- accident_12var[train_indices, ]
test_set <- accident_12var[-train_indices, ]
# TRAIN SET
# Make a copy of the train set
copied_traindata <- data.frame(train_set)
# Add an id column to copied_traindata
copied_traindata <- copied_traindata %>% mutate(id = row_number())
# Separate Label from Feature
accident <- select(copied_traindata, -Severity) # drop Severity column
label <- copied_traindata$Severity # select Severity column
# Separate Numerical from Categorical
accident_num <- accident %>%
select(id, Temperature.F., Humidity..., Visibility.mi., Wind_Speed.mph., Precipitation.in.)
accident_cat <- accident %>%
select(id, State, Weather_Condition, Crossing, Junction, Traffic_Signal, Sunrise_Sunset)
# Define numeric and categorical attributes
num_attribs <- names(accident_num)[2:6]
cat_attribs <- names(accident_cat)[2:7]
# Define preprocessing pipelines
num_pipeline <- recipe(~., data = accident_num) %>%
step_impute_median(all_numeric(), -has_role("id")) %>%
step_center(all_numeric(), -has_role("id")) %>%
step_scale(all_numeric(), -has_role("id"))
cat_pipeline <- recipe(~., data = accident_cat) %>%
step_dummy(all_nominal())
# Merge the preprocessed numerical and categorical features into a single dataset
accident <- accident %>% rename(Index = id)
df1 <- mutate(num_pipeline %>% prep() %>% bake(new_data = NULL), join_key = "Index")
df2 <- mutate(cat_pipeline %>% prep() %>% bake(new_data = NULL), join_key = "Index")
accident_prepared <- accident %>%
select(-one_of(c(cat_attribs, num_attribs)))
accident_prepared <- cbind(accident_prepared, df1,df2)
accident_prepared <- accident_prepared %>%
distinct()
accident_prepared <- select(accident_prepared, -c("Index", "id", "join_key", "id.1", "join_key.1"))
#TEST SET
# Make a copy of the test set
copied_testdata <- data.frame(test_set)
# Add an id column to copied_testdata
copied_testdata <- copied_testdata %>% mutate(id = row_number())
# Separate Label from Feature
accident_test <- select(copied_testdata, -Severity) # drop Severity column
label_test <- copied_testdata$Severity # select Severity column
# Separate Numerical from Categorical
accident_num_test <- copied_testdata %>%
select(Temperature.F., Humidity..., Visibility.mi., Wind_Speed.mph., Precipitation.in.)
accident_cat_test <- copied_testdata %>%
select(State, Weather_Condition, Crossing, Junction, Traffic_Signal, Sunrise_Sunset)
# Define numeric and categorical attributes
num_attribs <- names(accident_num_test)[1:6]
cat_attribs <- names(accident_cat_test)[1:7]
# Define preprocessing pipelines
num_pipeline <- recipe(~., data = accident_num_test) %>%
step_impute_median(all_numeric(), -has_role("id")) %>%
step_center(all_numeric(), -has_role("id")) %>%
step_scale(all_numeric(), -has_role("id"))
cat_pipeline <- recipe(~., data = accident_cat_test) %>%
step_dummy(all_nominal())
# Merge the preprocessed numerical and categorical features into a single dataset
copied_testdata <- copied_testdata %>% rename(Index = id)
df1 <- mutate(num_pipeline %>% prep() %>% bake(new_data = NULL), join_key = "Index")
df2 <- mutate(cat_pipeline %>% prep() %>% bake(new_data = NULL), join_key = "Index")
accident_prepared_test <- accident_test %>%
select(-one_of(c(cat_attribs, num_attribs)))
accident_prepared_test <- cbind(accident_prepared_test, df1,df2)
accident_prepared_test <- accident_prepared_test %>%
distinct()
accident_prepared_test <- select(accident_prepared_test, -c("id", "join_key", "join_key.1"))4 Models
4.1 Linear Regression
# Fit the linear regression model
lin_reg <- lm(label ~ ., data = accident_prepared)
# Use the model to predict the response variable using the test data
y_pred <- predict(lin_reg, newdata = accident_prepared_test)
# Calculate the residuals
residuals <- y_pred - label_test
# Calculate the squared errors
squared_errors <- residuals^2
# Calculate the mean squared error
mse <- mean(squared_errors)
# Print the MSE
cat("MSE:", mse)
## MSE: 0.13249144.2 Ridge Regression
#ridge regression
library(glmnet)
# Separate the predictor variables from the response variable
y <- label
X <- as.matrix(select(accident_prepared, -label))
# Define the lambda sequence for ridge regression
lambda_seq <- 10^seq(10, -2, length = 100)
# Perform cross-validated ridge regression
ridge_fit <- cv.glmnet(X, y, alpha = 0, lambda = lambda_seq)
# Plot the cross-validation results
plot(ridge_fit)
ridge_coef <- coef(ridge_fit)[-1]
y_pred <- predict(ridge_fit, newx = X)
mse <- mean((y - y_pred)^2)
# Print the MSE
cat("MSE:", mse)
## MSE: 0.13411024.3 Lasso Regression
x <- model.matrix(~ ., data = accident_prepared)
y <- label
# Fit a Lasso regression with cross-validation
lasso_model <- cv.glmnet(x, y, alpha = 1)
extra_columns <- setdiff(colnames(accident_prepared_test), colnames(accident_prepared))
accident_prepared_test <- accident_prepared_test %>%
select(-one_of(extra_columns))
# Predict the response variable using the test data
x_test <- model.matrix(~ ., data = accident_prepared_test)
y_pred <- predict(lasso_model, newx = x_test)
# Calculate the MSE
mse <- mean((y_pred - label_test)^2)
# Print the MSE
cat("MSE:", mse)
## MSE: 0.134835
plot(lasso_model)
5 Results
5.1 Linear Regression Results
library(stargazer)
stargazer(lin_reg, type = "html", title="Regression Results", ci=TRUE, ci.level=0.90, single.row=TRUE)
##
## <table style="text-align:center"><caption><strong>Regression Results</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>label</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Temperature.F.</td><td>0.002 (-0.001, 0.005)</td></tr>
## <tr><td style="text-align:left">Humidity...</td><td>0.007<sup>***</sup> (0.004, 0.010)</td></tr>
## <tr><td style="text-align:left">Visibility.mi.</td><td>-0.002 (-0.005, 0.002)</td></tr>
## <tr><td style="text-align:left">Wind_Speed.mph.</td><td>-0.001 (-0.004, 0.002)</td></tr>
## <tr><td style="text-align:left">Precipitation.in.</td><td>-0.0002 (-0.003, 0.002)</td></tr>
## <tr><td style="text-align:left">Crossing</td><td>-0.036<sup>***</sup> (-0.043, -0.029)</td></tr>
## <tr><td style="text-align:left">Junction</td><td>0.019 (-0.014, 0.052)</td></tr>
## <tr><td style="text-align:left">Traffic_Signal</td><td>-0.014<sup>***</sup> (-0.021, -0.007)</td></tr>
## <tr><td style="text-align:left">State_AR</td><td>0.150<sup>***</sup> (0.101, 0.199)</td></tr>
## <tr><td style="text-align:left">State_AZ</td><td>-0.199<sup>***</sup> (-0.233, -0.165)</td></tr>
## <tr><td style="text-align:left">State_CA</td><td>-0.104<sup>***</sup> (-0.135, -0.074)</td></tr>
## <tr><td style="text-align:left">State_CO</td><td>0.554<sup>***</sup> (0.506, 0.602)</td></tr>
## <tr><td style="text-align:left">State_CT</td><td>0.503<sup>***</sup> (0.451, 0.555)</td></tr>
## <tr><td style="text-align:left">State_DC</td><td>0.149<sup>***</sup> (0.103, 0.196)</td></tr>
## <tr><td style="text-align:left">State_DE</td><td>0.375<sup>***</sup> (0.308, 0.441)</td></tr>
## <tr><td style="text-align:left">State_FL</td><td>-0.097<sup>***</sup> (-0.127, -0.066)</td></tr>
## <tr><td style="text-align:left">State_GA</td><td>0.601<sup>***</sup> (0.552, 0.650)</td></tr>
## <tr><td style="text-align:left">State_IA</td><td>0.400<sup>***</sup> (0.336, 0.463)</td></tr>
## <tr><td style="text-align:left">State_ID</td><td>0.006 (-0.042, 0.053)</td></tr>
## <tr><td style="text-align:left">State_IL</td><td>0.290<sup>***</sup> (0.254, 0.327)</td></tr>
## <tr><td style="text-align:left">State_IN</td><td>0.594<sup>***</sup> (0.540, 0.648)</td></tr>
## <tr><td style="text-align:left">State_KS</td><td>0.112 (-0.007, 0.231)</td></tr>
## <tr><td style="text-align:left">State_KY</td><td>0.024 (-0.044, 0.092)</td></tr>
## <tr><td style="text-align:left">State_LA</td><td>-0.119<sup>***</sup> (-0.152, -0.086)</td></tr>
## <tr><td style="text-align:left">State_MA</td><td>0.538<sup>***</sup> (0.447, 0.630)</td></tr>
## <tr><td style="text-align:left">State_MD</td><td>0.193<sup>***</sup> (0.157, 0.229)</td></tr>
## <tr><td style="text-align:left">State_ME</td><td>-0.162<sup>*</sup> (-0.316, -0.009)</td></tr>
## <tr><td style="text-align:left">State_MI</td><td>0.084<sup>***</sup> (0.048, 0.120)</td></tr>
## <tr><td style="text-align:left">State_MN</td><td>-0.105<sup>***</sup> (-0.138, -0.072)</td></tr>
## <tr><td style="text-align:left">State_MO</td><td>0.006 (-0.039, 0.050)</td></tr>
## <tr><td style="text-align:left">State_MS</td><td>0.135<sup>***</sup> (0.057, 0.214)</td></tr>
## <tr><td style="text-align:left">State_MT</td><td>-0.094<sup>***</sup> (-0.133, -0.056)</td></tr>
## <tr><td style="text-align:left">State_NC</td><td>-0.018 (-0.049, 0.014)</td></tr>
## <tr><td style="text-align:left">State_ND</td><td>-0.119<sup>*</sup> (-0.220, -0.019)</td></tr>
## <tr><td style="text-align:left">State_NE</td><td>0.311<sup>***</sup> (0.215, 0.407)</td></tr>
## <tr><td style="text-align:left">State_NH</td><td>0.617<sup>***</sup> (0.477, 0.758)</td></tr>
## <tr><td style="text-align:left">State_NJ</td><td>0.271<sup>***</sup> (0.233, 0.310)</td></tr>
## <tr><td style="text-align:left">State_NM</td><td>-0.101 (-0.245, 0.044)</td></tr>
## <tr><td style="text-align:left">State_NV</td><td>0.196<sup>***</sup> (0.112, 0.280)</td></tr>
## <tr><td style="text-align:left">State_NY</td><td>0.157<sup>***</sup> (0.124, 0.190)</td></tr>
## <tr><td style="text-align:left">State_OH</td><td>0.030 (-0.010, 0.069)</td></tr>
## <tr><td style="text-align:left">State_OK</td><td>-0.008 (-0.050, 0.035)</td></tr>
## <tr><td style="text-align:left">State_OR</td><td>-0.053<sup>***</sup> (-0.084, -0.021)</td></tr>
## <tr><td style="text-align:left">State_PA</td><td>0.012 (-0.020, 0.043)</td></tr>
## <tr><td style="text-align:left">State_RI</td><td>0.109 (-0.092, 0.311)</td></tr>
## <tr><td style="text-align:left">State_SC</td><td>-0.098<sup>***</sup> (-0.130, -0.067)</td></tr>
## <tr><td style="text-align:left">State_SD</td><td>0.078 (-0.105, 0.261)</td></tr>
## <tr><td style="text-align:left">State_TN</td><td>-0.087<sup>***</sup> (-0.120, -0.053)</td></tr>
## <tr><td style="text-align:left">State_TX</td><td>-0.093<sup>***</sup> (-0.125, -0.062)</td></tr>
## <tr><td style="text-align:left">State_UT</td><td>-0.057<sup>***</sup> (-0.094, -0.021)</td></tr>
## <tr><td style="text-align:left">State_VA</td><td>0.071<sup>***</sup> (0.038, 0.103)</td></tr>
## <tr><td style="text-align:left">State_VT</td><td>-0.236<sup>**</sup> (-0.428, -0.043)</td></tr>
## <tr><td style="text-align:left">State_WA</td><td>0.242<sup>***</sup> (0.202, 0.281)</td></tr>
## <tr><td style="text-align:left">State_WI</td><td>1.430<sup>***</sup> (1.339, 1.520)</td></tr>
## <tr><td style="text-align:left">State_WV</td><td>0.066 (-0.005, 0.136)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Blowing.Dust...Windy</td><td>0.023 (-0.457, 0.503)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Blowing.Snow...Windy</td><td>-0.175 (-0.815, 0.466)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Clear</td><td>0.273 (-0.062, 0.607)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Cloudy</td><td>0.015 (-0.212, 0.241)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Cloudy...Windy</td><td>0.028 (-0.201, 0.258)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Drizzle</td><td>0.136 (-0.110, 0.381)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Drizzle.and.Fog</td><td>-0.068 (-0.444, 0.307)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Fair</td><td>0.020 (-0.207, 0.246)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Fair...Windy</td><td>0.052 (-0.176, 0.280)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Fog</td><td>0.007 (-0.220, 0.234)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Fog...Windy</td><td>-0.007 (-0.340, 0.327)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Freezing.Drizzle</td><td>-0.183 (-0.823, 0.457)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Haze</td><td>0.010 (-0.217, 0.238)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Haze...Windy</td><td>0.057 (-0.202, 0.315)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Drizzle</td><td>-0.057 (-0.470, 0.356)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Rain</td><td>0.008 (-0.223, 0.238)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Rain...Windy</td><td>0.196 (-0.099, 0.492)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Sleet</td><td>0.030 (-0.610, 0.670)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Snow</td><td>0.038 (-0.213, 0.290)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.Snow...Windy</td><td>0.017 (-0.463, 0.498)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.T.Storm</td><td>-0.008 (-0.241, 0.226)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Heavy.T.Storm...Windy</td><td>-0.165 (-0.467, 0.138)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Drizzle</td><td>0.040 (-0.192, 0.271)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Drizzle...Windy</td><td>0.659<sup>***</sup> (0.308, 1.010)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Freezing.Drizzle</td><td>-0.054 (-0.404, 0.297)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Freezing.Rain</td><td>0.252 (-0.050, 0.554)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Freezing.Rain...Windy</td><td>0.871<sup>***</sup> (0.496, 1.247)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Ice.Pellets</td><td>0.814<sup>***</sup> (0.334, 1.294)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Rain</td><td>0.029 (-0.198, 0.255)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Rain...Windy</td><td>0.057 (-0.177, 0.292)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Rain.Shower</td><td>0.143 (-0.233, 0.519)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Rain.with.Thunder</td><td>-0.007 (-0.238, 0.223)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Snow</td><td>0.067 (-0.161, 0.294)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Snow...Windy</td><td>0.048 (-0.194, 0.291)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Snow.and.Sleet</td><td>-0.011 (-0.651, 0.629)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Snow.Shower</td><td>-0.023 (-0.663, 0.618)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Light.Thunderstorms.and.Rain</td><td>0.613 (-0.028, 1.253)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Mist</td><td>0.123 (-0.162, 0.408)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Mostly.Cloudy</td><td>0.006 (-0.221, 0.232)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Mostly.Cloudy...Windy</td><td>0.022 (-0.208, 0.252)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_N.A.Precipitation</td><td>-0.091 (-0.345, 0.163)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Overcast</td><td>0.498<sup>***</sup> (0.263, 0.734)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Partly.Cloudy</td><td>0.016 (-0.211, 0.242)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Partly.Cloudy...Windy</td><td>-0.004 (-0.238, 0.230)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Patches.of.Fog</td><td>-0.003 (-0.255, 0.250)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Rain</td><td>0.014 (-0.214, 0.242)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Rain...Windy</td><td>0.036 (-0.231, 0.303)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Scattered.Clouds</td><td>-0.157 (-0.570, 0.256)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Shallow.Fog</td><td>-0.055 (-0.304, 0.194)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Showers.in.the.Vicinity</td><td>-0.080 (-0.493, 0.334)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Sleet</td><td>-0.215 (-0.695, 0.265)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Smoke</td><td>0.131 (-0.099, 0.360)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Smoke...Windy</td><td>1.000<sup>***</sup> (0.520, 1.480)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Snow</td><td>0.081 (-0.154, 0.316)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Snow...Windy</td><td>-0.002 (-0.322, 0.318)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Snow.and.Sleet</td><td>-0.119 (-0.759, 0.521)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_T.Storm</td><td>0.025 (-0.205, 0.255)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_T.Storm...Windy</td><td>-0.016 (-0.429, 0.397)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Thunder</td><td>0.011 (-0.219, 0.241)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Thunder...Windy</td><td>-0.025 (-0.315, 0.264)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Thunder...Wintry.Mix</td><td>-0.253 (-0.894, 0.388)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Thunder.and.Hail</td><td>0.004 (-0.636, 0.643)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Thunder.in.the.Vicinity</td><td>0.033 (-0.195, 0.262)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Widespread.Dust</td><td>0.005 (-0.635, 0.644)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Widespread.Dust...Windy</td><td>0.021 (-0.619, 0.661)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Wintry.Mix</td><td>0.040 (-0.194, 0.274)</td></tr>
## <tr><td style="text-align:left">Weather_Condition_Wintry.Mix...Windy</td><td>-0.008 (-0.648, 0.631)</td></tr>
## <tr><td style="text-align:left">Sunrise_Sunset_Night</td><td>0.012<sup>***</sup> (0.006, 0.017)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>2.096<sup>***</sup> (1.867, 2.324)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>75,921</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.108</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.107</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>0.364 (df = 75797)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>74.585<sup>***</sup> (df = 123; 75797)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
library(coefplot)
library(broom)
# Extract coefficients and standard errors
coef_df <- tidy(lin_reg, conf.int = TRUE)
# Filter out intercept
coef_df <- coef_df[-1,]
num_coef_df <- coef_df[coef_df$term %in% num_attribs,]
cat_coef_df <- coef_df[grep(".*\\_.*", coef_df$term), ]
# Create plots
plot_num <- ggplot(num_coef_df, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Numeric Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_num
cat_coef_df1 <- cat_coef_df[1:25,]
cat_coef_df2 <- cat_coef_df[25:50,]
cat_coef_df3 <- cat_coef_df[50:75,]
cat_coef_df4 <- cat_coef_df[75:100,]
cat_coef_df5 <- cat_coef_df[100:125,]
# Create separate plots
plot_cat1 <- ggplot(cat_coef_df1, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Categorical Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_cat1
plot_cat2 <- ggplot(cat_coef_df2, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Categorical Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_cat2
plot_cat3 <- ggplot(cat_coef_df3, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Categorical Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_cat3
plot_cat4 <- ggplot(cat_coef_df4, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Categorical Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_cat4
plot_cat5 <- ggplot(cat_coef_df5, aes(x = estimate, y = reorder(term, estimate))) +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
labs(x = "Coefficient Estimate", y = "Variable") +
ggtitle("Linear Regression Results for Categorical Variables") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))+
geom_vline(xintercept = 0, linetype = "dashed", color = "red")
plot_cat5